/**
 * 
 */
package com.reptile.core.webmagic.action;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * @author wangzihang
 *
 */
public class TaobaoReptile {
	/*******ua设置方法*******/
	//	Jsoup.connect(url)
	//	.header("User-Agent","Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2")
	//	.get();
	
	//attr（herf）
	/*******ua设置方法 END*******/
	
	
	/*设置网页抓取响应时间*/
	private static final int TIMEOUT = 10000;
	
	public static Map<String, Object> getSerieExtDetail(int serieId) throws Exception{
		
		/*车系参数配置页面*/
		String serieInfo = "http://car.autohome.com.cn/config/series/"+serieId+".html";
		
		/*用來封裝要保存的参数*/
		Map<String, Object> map = new HashMap<String, Object>();
		
		/*取得车系参数配置页面文档*/
		Document document = Jsoup.connect(serieInfo).timeout(TIMEOUT).get();
		
		/*取得script下面的JS变量*/
		Elements e = document.getElementsByTag("script").eq(6);
		
		/*循环遍历script下面的JS变量*/
		for (Element element : e) {
			
			/*取得JS变量数组*/
			String[] data = element.data().toString().split("var");
			
			/*取得单个JS变量*/
			for(String variable : data){
				
				/*过滤variable为空的数据*/
				if(variable.contains("=")){
					
					/*取到满足条件的JS变量*/
					if(variable.contains("option") || variable.contains("config") 
							|| variable.contains("color") || variable.contains("innerColor")){
						
						String[]  kvp = variable.split("=");
						
						/*取得JS变量存入map*/
						if(!map.containsKey(kvp[0].trim())) 
							map.put(kvp[0].trim(), kvp[1].trim().substring(0, kvp[1].trim().length()-1).toString());
					}
				}
			}
		}
		return map;
	}
	
	
	/**
	 * @author wangzihang
	 * @throws IOException 
	 */
	public static void main(String[] args) throws IOException {

		// 计时
		long startTime, endTime;
		System.out.println("开始爬取...");
		startTime = System.currentTimeMillis();
		//计数
		int size = 0;
			// 爬取网站
			String URL = "http://open.taobao.com/apitools/apiPropTools.htm?spm=0.0.0.0.mlPbbQ";
			Map<String, String> cookie = new HashMap<String, String>();
			cookie.put("Cookie", "miid=168704831389634083; t=41f67944a12e9969952928576433d6e8; hng=CN%7Czh-CN%7CCNY%7C156; cna=ZrFQFMoRJg0CAXzsknUn4osN; tg=0; thw=cn; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1; tracknick=%5Cu6253%5Cu4E2A%5Cu6298%5Cu6263%5Cu5427%5Cu8001%5Cu677F; lgc=%5Cu6253%5Cu4E2A%5Cu6298%5Cu6263%5Cu5427%5Cu8001%5Cu677F; mt=ci=0_0&np=; v=0; cookie2=2e4a27fdde4f1146febb184086241612; _tb_token_=e134783383be0; dnk=%5Cu6253%5Cu4E2A%5Cu6298%5Cu6263%5Cu5427%5Cu8001%5Cu677F; JSESSIONID=0FBF58D7237E98BB0CB4B722E5A866A1; unb=1953598004; uc1=cookie16=VT5L2FSpNgq6fDudInPRgavC%2BQ%3D%3D&cookie21=Vq8l%2BKCLjhS4UhJVbhgU&cookie15=UtASsssmOIJ0bQ%3D%3D&existShop=false&pas=0&cookie14=UoTYNOgaFI%2B6HA%3D%3D&tag=8&lng=zh_CN; sg=%E6%9D%BF44; _l_g_=Ug%3D%3D; skt=c1f494df14ef2580; cookie1=B0f27vpFGP4y9D2e%2BCE9z5ZBSansgOiDnBz5PUG0FNY%3D; csg=d5825d65; uc3=vt3=F8dByR6umn2sDRASCVA%3D&id2=UojRY%2BjEQh%2BUVg%3D%3D&nk2=1z78Lj9jugNephaFUMA%3D&lg2=V32FPkk%2Fw0dUvg%3D%3D; existShop=MTU0MjcwMDAzOQ%3D%3D; _cc_=WqG3DMC9EA%3D%3D; _nk_=%5Cu6253%5Cu4E2A%5Cu6298%5Cu6263%5Cu5427%5Cu8001%5Cu677F; cookie17=UojRY%2BjEQh%2BUVg%3D%3D; isg=BHx8jrJ3C_yHrD8a1YTCnMHgTRru3SAwfnTlPFb9SmdKIRmrfoUwL1zTBQn8aVj3");
			Document doc = Jsoup.connect(URL).cookies(cookie).get();
			
			// 元素节点(每个分类的a标签)
			Elements taobaoOne = doc.getElementsByTag("script").eq(8);
//			System.out.println(taobaoOne.toString());
			//一级分类
			for (Element elementOne : taobaoOne) {
				String[] data = elementOne.data().toString().split("var");
				for(String variable : data){
					if(variable.contains("=")){
						if(variable.contains("cid1_api")){
							
							System.out.println(variable.split("=")[1].trim().substring(variable.split("=")[1].trim().indexOf("|"), variable.split("=")[1].trim().lastIndexOf("|")-1));
						}
					}
					//输出一级分类
//					if(variable.contains("option") || variable.contains("config") 
//							|| variable.contains("color") || variable.contains("innerColor")){
//						
//					}
				}
//				System.out.println(elementOne.data().toString().split("var"));
				
				size++;
			}

		
		// 耗时
		System.out.println("----------------------------");
		endTime = System.currentTimeMillis();
		System.out.println("爬取结束，耗时约" + ((endTime - startTime) / 1000) + "秒");
		System.out.println("共" + size + "数据");
	}

}
